# coding utf-8
import argparse
import os



parser = argparse.ArgumentParser(description="split English txt.bak")
parser.add_argument("--bakPath", default="", type=str, help="bakPath")
parser.add_argument("--wordNum", default="", type=int, help="wordNum")
opt = parser.parse_args()


Num = opt.wordNum
filepath=opt.bakPath

def remove_space(filepath,num=50):
    a = '.bak'
    for files in os.walk(filepath):
        for file in files[2]:
            if(a in file):
                outname = os.path.splitext(file)[0]
                temp_name = 'temp' + outname
                with open(filepath + file,'r+',encoding='utf-8') as f,open(filepath + temp_name,'w+',encoding='utf-8') as new_f:
                    for line in f:
                        line = line.strip()
                        if len(line) > 0:
                            new_f.write(line+' ')
                f.close()
                new_f.close()     

def splitEnText(filepath,num = 50):
    a = 'temp'
    ss = []
    for files in os.walk(filepath):
        for file in files[2]:
            if(a in file):
                outname = file[4:]
                with open(filepath + file, 'r',encoding='utf-8') as f:
                    lines = f.readlines()
                f.close()
                with open(filepath + outname, 'w',encoding='utf-8') as fo:
                    for line in lines:
                        words = line.split()
                        length = len(words)
                        d = int(length/num)
                        if(d>0):
                            for j in range(d):
                                s = ''
                                for i in range(num):
                                    s = s+words[j*num+i] +' '
                                if j!= d-1:
                                    s = s+'\n' #最后一行不输出空格
                                if(len(s)>5):
                                    ss.append(s)
                    fo.writelines(ss)
                    ss.clear()
                lines.clear()
                fo.close()
                os.remove(filepath + file)
                print(filepath + outname + "  finish")
                

remove_space(filepath)
splitEnText(filepath,num = Num)
